## flight from quality robustness checks
##	using oblast level data

## ===================================================================
## PRELIMINARIES
## ===================================================================

require(ggplot2)
require(lfe)
require(data.table)
require(stargazer)

## custom theme
theme_dan <- function(base_size=12){
  	theme_classic(base_size = base_size)+
	theme(axis.line.x = element_line(color='black'), axis.line.y=element_line(color='black')) + 
	theme(legend.position='bottom', legend.title=element_blank())  
}

proper <- function(x){gsub("(?<=\\b)([a-z])", "\\U\\1", tolower(x), perl=TRUE)}

## ===================================================================
## DATA INPUT
## ===================================================================

## SET DATA DIRECTORY HERE
## setwd(...)

## -- -- -- -- -- --
## monthly microdata
mdat <- fread('aggregate_monthly_panel.csv')		
smap <- data.table(input=c('', 'FW11', 'FW12', 'FW13', 'FW14', 'FW15', 'OUTLET', 'SS11', 'SS12', 'SS13', 'SS14', 'SS15'), 
		out=as.Date(c(NA,'2011-09-01', '2012-09-01', '2013-09-01', '2014-09-01', '2015-09-01', NA, 
			'2011-03-01', '2012-03-01', '2013-03-01', '2014-03-01', '2015-03-01'),format='%Y-%m-%d'))
mdat$season <- smap$out[match(mdat$season_1s, smap$input)]
mdat <- mdat[!is.na(mdat$season), ]

## -- -- -- -- -- --
## other data
dt3 <- fread('natural_coding.csv')					## updated product_sku + material data
erdats <- fread('erdats.csv')						## exchange rate data
pdat_raw <- fread('provincial_monthly_panel.csv')	 

mdat$season <- as.Date(mdat$season)					## convert date columns to date type variables
dt3$season <- as.Date(dt3$season)					
erdats$season <- as.Date(erdats$season)


## -- -- -- -- -- -- 
## GDP per capita growth data

gdat_raw <- fread('regional_growth2010-15.csv')
gdat <- melt(gdat_raw, measure.vars=c('yr2010', 'yr2011', 'yr2012', 'yr2013', 'yr2014', 'yr2015'), id.vars=c('region'))
setnames(gdat, c('region', 'year', 'gdppc')) 
gdat$year <- as.numeric(gsub('yr', '', gdat$year))

## cleaning region names:
gdat$region[grepl('KARACHA', gdat$region)] <- 'KARACHAY??�CHERKESS REPUBLIC'
gdat$region[grepl('OSSETIA', gdat$region)] <- 'REPUBLIC OF NORTH OSSETIA??�ALANIA'

## -- -- -- -- -- --
## cross-sectional growth stats on provinces

cdat_raw <- fread('regional_growth.csv')

## cleaning region names:
cdat_raw$region[grepl('KARACHA', cdat_raw$region)] <- 'KARACHAY??�CHERKESS REPUBLIC'
cdat_raw$region[grepl('OSSETIA', cdat_raw$region)] <- 'REPUBLIC OF NORTH OSSETIA??�ALANIA'
cdat_raw$region[grepl('KHANTY', cdat_raw$region)] <- 'KHANTY??�MANSI AUTONOMOUS OKRUG'

cdat <- cdat_raw[, by=.(region), list(econgr2015 = econgr2015[1], unemp_rate = unemp_rate[1], mean_month_income=mean_month_income[1])]


## ===================================================================
## CREATE MAIN DATASET
## ===================================================================


temp <- mdat[, by=.(season_1s), list(season=season[1])]		## add in seasons to pdat_raw
pdat <- merge(pdat_raw, temp, by=c('season_1s'))

pdat <- merge(pdat, dt3, by=c('product_sku', 'brand', 'targetgroup', 'season'))		## add in materials/natural coding to pdat
pdat$year <- year(pdat$season)

pdat <- merge(pdat, gdat, by=c('year', 'region'))
pdat <- merge(pdat, cdat, by=c('region'), all.x=T)

## ===================================================================
## CHEN AND JUVENAL (2018) ANALYSIS, EQUATION (3)
## ===================================================================

## -- -- -- 
## first version: quality-targetgroup is a good over time

## region-quality-targetgroup-year expenditure: dependent variable
## region-quality-targetgroup-year price: dependent variable

## region-year GDP/capita, independent variable
## quality dummy, independent variable
## targetgroup-region-year dummy
## quality-targetgroup-year dummy

pdt <- pdat[year >= 2012, by=.(region, nat, targetgroup, year), list(X=sum(sales*price,na.rm=T), P=sum(sales*price,na.rm=T)/sum(sales,na.rm=T),
			gdppc=gdppc[1])]


pdt$tgpy <- paste(pdt$targetgroup, pdt$region, pdt$year, sep='-')
pdt$mtgy <- paste(pdt$nat, pdt$targetgroup, pdt$year, sep='-')
pdt$mtgp <- paste(pdt$nat, pdt$targetgroup, pdt$region, sep='-')

pdt[, by=.(region, nat, targetgroup), c('dlX', 'dlP', 'dlgdppc') := 
					list(	c(NA, log(X[2:.N]) - log(X[1:(.N-1)])), 
						c(NA, log(P[2:.N]) - log(P[1:(.N-1)])), 
						c(NA, log(gdppc[2:.N]) - log(gdppc[1:(.N-1)])) )]


fe1_X <- felm(dlX ~ dlgdppc:nat | tgpy + mtgy + mtgp | 0 | mtgp, data=pdt)
fe1_P <- felm(dlP ~ dlgdppc:nat | tgpy + mtgy + mtgp | 0 | mtgp, data=pdt)


## -- -- -- 
## second version: quality-targetgroup-BRAND is a good over time

pdtb <- pdat[year >= 2012, by=.(region, nat, brand, targetgroup, year), list(X=sum(sales*price,na.rm=T), P=sum(sales*price,na.rm=T)/sum(sales,na.rm=T), 
			gdppc=gdppc[1])]
pdtb$tgbpy <- paste(pdtb$targetgroup, pdtb$region, pdtb$brand, pdtb$year, sep='-')
pdtb$mtgby <- paste(pdtb$nat, pdtb$targetgroup, pdtb$brand, pdtb$year, sep='-')
pdtb$mtgbp <- paste(pdtb$nat, pdtb$targetgroup, pdtb$brand, pdtb$region, sep='-')

pdtb$mtgp <- paste(pdtb$nat, pdtb$targetgroup, pdtb$region, sep='-')


pdtb <- pdtb[order(region, nat, targetgroup, brand, year), ]
pdtb$ok <- c(0, 1*(	pdtb$region[2:nrow(pdtb)] == pdtb$region[1:(nrow(pdtb)-1)] & 
				pdtb$nat[2:nrow(pdtb)] == pdtb$nat[1:(nrow(pdtb)-1)] & 
				pdtb$targetgroup[2:nrow(pdtb)] == pdtb$targetgroup[1:(nrow(pdtb)-1)] & 
				pdtb$brand[2:nrow(pdtb)] == pdtb$brand[1:(nrow(pdtb)-1)] & 
				(pdtb$year[2:nrow(pdtb)] == pdtb$year[1:(nrow(pdtb)-1)] + 1)))

pdtb$dlX <- c(NA, log(pdtb$X[2:nrow(pdtb)]) - log(pdtb$X[1:(nrow(pdtb)-1)]))
pdtb$dlP <- c(NA, log(pdtb$P[2:nrow(pdtb)]) - log(pdtb$P[1:(nrow(pdtb)-1)]))
pdtb$dlgdppc <- c(NA, log(pdtb$gdppc[2:nrow(pdtb)]) - log(pdtb$gdppc[1:(nrow(pdtb)-1)]))

pdtb[, c('dlX', 'dlP', 'dlgdppc') := list(ifelse(ok==1, dlX, NA), ifelse(ok==1, dlP, NA), ifelse(ok==1, dlgdppc, NA))]


fe2_X <- felm(dlX ~ dlgdppc:nat | tgbpy + mtgby + mtgp | 0 | 0, data=pdtb[!is.na(dlgdppc) & dlX > -Inf, ])
fe2_P <- felm(dlP ~ dlgdppc:nat | tgbpy + mtgby + mtgp | 0 | 0, data=pdtb[!is.na(dlgdppc) & dlX > -Inf, ])


## -- -- -- -- -- -- -- -- -- --
## OUTPUT

## TABLE B.12

stargazer(fe1_X, fe1_P,  
	column.sep.width='0pt', no.space=T, align=TRUE, omit='Constant', omit.stat=c('adj.rsq', 'ser'),
	star.cutoffs = c(0.05, 0.01, 0.001),
		add.lines=list(c('Group $\\times$ Oblast $\\times$ Year FE', '\\checkmark', '\\checkmark'),
					c('Quality $\\times$ Group $\\times$ Year FE', '\\checkmark', '\\checkmark'),
		 			c('Quality $\\times$ Group $\\times$ Oblast FE', '\\checkmark', '\\checkmark')
					))


## TABLE B.13

stargazer(fe2_X, fe2_P,  
	column.sep.width='0pt', no.space=T, align=TRUE, omit='Constant', omit.stat=c('adj.rsq', 'ser'),
	star.cutoffs = c(0.05, 0.01, 0.001),
		add.lines=list(c('Group $\\times$ Brand $\\times$ Oblast $\\times$ Year FE', '\\checkmark', '\\checkmark'),
					c('Quality $\\times$ Group $\\times$ Brand $\\times$ Year FE', '\\checkmark', '\\checkmark'),
		 			c('Quality $\\times$ Group $\\times$ Oblast FE', '\\checkmark', '\\checkmark')
					))




## ===================================================================
## ALTERNATIVE FLIGHT FROM QUALITY TEST
## ===================================================================

## SET OUTPUT DIRECTORY HERE
## setwd(...)


## quality share in group g, season t, region c
## 	growth rate in region c, interacted with season t
##	income and unemployment in c, interacted with season t
##	regional sales in gct
##	fixed effects at gt and gc level

seasons <- sort(unique(pdat$season))

pdt2 <- pdat[year >= 2012, by=.(region, targetgroup, season), 
			list(	natfrac = length(unique(product_sku[nat==1]))/length(unique(product_sku)), 
				P=sum(sales*price,na.rm=T)/sum(sales,na.rm=T),
				econgr2015=econgr2015[1], unemp_rate=unemp_rate[1], mean_month_income=mean_month_income[1])]


pdt2$tgs <- paste(pdt2$targetgroup, pdt2$season, sep='-')
pdt2$tgc <- paste(pdt2$targetgroup, pdt2$region, sep='-')


pdt2$s1 <- 1*(pdt2$season == as.Date('2012-03-01'))
pdt2$s2 <- 1*(pdt2$season == as.Date('2012-09-01'))
pdt2$s3 <- 1*(pdt2$season == as.Date('2013-03-01'))
pdt2$s4 <- 1*(pdt2$season == as.Date('2013-09-01'))
pdt2$s5 <- 1*(pdt2$season == as.Date('2014-03-01'))
pdt2$s6 <- 1*(pdt2$season == as.Date('2014-09-01'))
pdt2$s7 <- 1*(pdt2$season == as.Date('2015-03-01'))
pdt2$s8 <- 1*(pdt2$season == as.Date('2015-09-01'))


fe2_natfrac <- felm(natfrac ~ econgr2015:s2 + econgr2015:s3 + econgr2015:s4 + econgr2015:s5 +     
				econgr2015:s6 + econgr2015:s7 + econgr2015:s8 | 
				tgs + tgc | 0 | tgc, data=pdt2)
fe2_P <- felm(P ~ econgr2015:s2 + econgr2015:s3 + econgr2015:s4 + econgr2015:s5 +     
				econgr2015:s6 + econgr2015:s7 + econgr2015:s8 | 
				tgs + tgc | 0 | tgc, data=pdt2)

ee_dt2 <- data.table(season = seasons[seasons >= as.Date('2012-03-01')], coef=c(0, coef(fe2_natfrac)), se=c(NA, fe2_natfrac$cse))
p1 <- ggplot(ee_dt2, aes(x=season, y=coef)) + geom_errorbar(aes(ymin=coef-1.96*se, ymax=coef + 1.96*se), width=20) + geom_point() + 
		theme_dan() + theme(axis.title.x=element_blank()) + ylab('Coefficient') + geom_hline(yintercept=0, linetype='dashed', col='red')

ggsave('robustness_flight_from_quality_timing_test.pdf', p1, height=4, width=6)
dev.off()







##